#importing the required data
import pandas as pd
import pandas_profiling as pp
#reading the data
data= pd.read_csv("/users/abhishekkumar/downloads/vehicles.csv")
#Data profiling
pp.ProfileReport(data)
As expected from the data points;
#Handling missing values
updata=data
#data Cleaning
updata= data.drop(["carfax_url","passengers"],axis=1,inplace=False)
updata= updata.drop_duplicates(subset=['vin'])# dropping duplicates
updata["engine"].fillna("Value not available", inplace = True)
updata["description"].fillna("Description not available", inplace = True)
updata.to_csv('/users/abhishekkumar/downloads/vehicle_data_file.csv')